InĀ [Ā ]:
# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627
# DIMENSION REDUCTION AND SHRINKAGE
# Part II. Ridge Regression and LASSO
# Import necessary libraries
! pip install pandas;
! pip install numpy;
! pip install scikit-learn;
! pip install matplotlib;
! pip install ISLP;
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from ISLP import load_data
InĀ [50]:
# 1. Prepare the data
boston = load_data('boston')
X = boston.drop(columns=['medv'])
y = boston['medv'].values
#X = boston[['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio', 'lstat']]
#y = boston[['medv']]
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
InĀ [51]:
### 2. Ridge Regression ###
alphas = np.linspace(0.01, 10, 100)
ridge = RidgeCV(alphas=alphas, store_cv_values=True)
ridge.fit(X_train, y_train)
# Get the best alpha (lambda)
best_lambda_ridge = ridge.alpha_
print("Best Lambda for Ridge: ", best_lambda_ridge)
# Coefficients for Ridge Regression
ridge_coefs = ridge.coef_
print("Ridge Coefficients: ", ridge_coefs)
Best Lambda for Ridge: 0.1109090909090909 Ridge Coefficients: [-1.32503607e-01 3.28829322e-02 1.82997027e-02 4.65656980e+00 -1.17597553e+01 4.44253504e+00 -2.21148261e-02 -1.40266266e+00 1.73253351e-01 -8.96885899e-03 -7.83551562e-01 -5.48322821e-01]
C:\Users\baron\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_ridge.py:2341: FutureWarning: 'store_cv_values' is deprecated in version 1.5 and will be removed in 1.7. Use 'store_cv_results' instead. warnings.warn(
InĀ [52]:
# Test MSE for Ridge Regression
y_pred_ridge = ridge.predict(X_test)
ridge_mse = mean_squared_error(y_test, y_pred_ridge)
print("Ridge Test MSE: ", ridge_mse)
Ridge Test MSE: 25.70968721232289
InĀ [56]:
# Plot cross-validation error for Ridge
plt.figure(figsize=(8, 6))
plt.plot(alphas, np.mean(ridge.cv_values_, axis=0), label='Cross-Validation MSE')
plt.axvline(best_lambda_ridge, linestyle='--', color='red', label=f'Best Lambda: {best_lambda_ridge}')
plt.xlabel('Lambda')
plt.ylabel('Mean Cross-Validation MSE')
plt.title('Ridge Regression Cross-Validation Error')
plt.legend()
plt.show()
InĀ [57]:
### Coefficient Path Plot for Ridge ###
plt.figure(figsize=(10, 6))
for i in range(X.shape[1]):
plt.plot(alphas, np.tile(ridge.coef_[i], len(alphas)), linestyle='--')
plt.xlabel('Lambda')
plt.ylabel('Coefficients')
plt.title('Ridge Coefficient Path')
plt.show()
InĀ [58]:
### 3. LASSO Regression ###
lasso = LassoCV(alphas=np.linspace(0.01, 10, 100), cv=10)
lasso.fit(X_train, y_train)
# Get the best alpha (lambda)
best_lambda_lasso = lasso.alpha_
print("Best Lambda for LASSO: ", best_lambda_lasso)
# Coefficients for LASSO Regression
print("LASSO Coefficients: ", lasso.coef_)
# Test MSE for LASSO Regression
y_pred_lasso = lasso.predict(X_test)
lasso_mse = mean_squared_error(y_test, y_pred_lasso)
print("LASSO Test MSE: ", lasso_mse)
Best Lambda for LASSO: 0.01 LASSO Coefficients: [-1.31098756e-01 3.30211944e-02 1.07322974e-02 4.53776335e+00 -1.00423233e+01 4.43183986e+00 -2.33686095e-02 -1.37532817e+00 1.69815535e-01 -9.16659741e-03 -7.66188240e-01 -5.52608791e-01] LASSO Test MSE: 25.777801586607513
InĀ [59]:
plt.figure(figsize=(10, 6))
for i in range(X.shape[1]):
plt.plot(alphas, ridge_paths[:, i], label=X.columns[i])
plt.xlabel('Lambda')
plt.ylabel('Coefficients')
plt.title('Ridge Coefficient Path')
plt.legend(loc='best')
plt.show()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[59], line 3 1 plt.figure(figsize=(10, 6)) 2 for i in range(X.shape[1]): ----> 3 plt.plot(alphas, ridge_paths[:, i], label=X.columns[i]) 4 plt.xlabel('Lambda') 5 plt.ylabel('Coefficients') TypeError: list indices must be integers or slices, not tuple
<Figure size 1000x600 with 0 Axes>
InĀ [60]:
# Plot cross-validation error for LASSO
plt.figure(figsize=(8, 6))
plt.plot(lasso.alphas_, np.mean(lasso.mse_path_, axis=1), label='Cross-Validation MSE')
plt.axvline(best_lambda_lasso, linestyle='--', color='red', label=f'Best Lambda: {best_lambda_lasso}')
plt.xlabel('Lambda')
plt.ylabel('Mean Cross-Validation MSE')
plt.title('LASSO Cross-Validation Error')
plt.legend()
plt.show()
InĀ [61]:
### Coefficient Path Plot for LASSO ###
plt.figure(figsize=(10, 6))
for i in range(X.shape[1]):
plt.plot(lasso.alphas_, lasso.path(X_train, y_train)[1][i], label=X.columns[i])
plt.xlabel('Lambda')
plt.ylabel('Coefficients')
plt.title('LASSO Coefficient Path')
plt.legend(loc='best')
plt.show()